{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tips = sns.load_dataset('tips')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip     sex smoker  day    time  size\n",
       "0       16.99  1.01  Female     No  Sun  Dinner     2\n",
       "1       10.34  1.66    Male     No  Sun  Dinner     3\n",
       "2       21.01  3.50    Male     No  Sun  Dinner     3\n",
       "3       23.68  3.31    Male     No  Sun  Dinner     2\n",
       "4       24.59  3.61  Female     No  Sun  Dinner     4"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.formula.api as smf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = smf.ols('tip ~ total_bill', data=tips)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "results = model.fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>tip</td>       <th>  R-squared:         </th> <td>   0.457</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.454</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   203.4</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Fri, 11 May 2018</td> <th>  Prob (F-statistic):</th> <td>6.69e-34</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>18:19:28</td>     <th>  Log-Likelihood:    </th> <td> -350.54</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   244</td>      <th>  AIC:               </th> <td>   705.1</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   242</td>      <th>  BIC:               </th> <td>   712.1</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th>  <td>    0.9203</td> <td>    0.160</td> <td>    5.761</td> <td> 0.000</td> <td>    0.606</td> <td>    1.235</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>total_bill</th> <td>    0.1050</td> <td>    0.007</td> <td>   14.260</td> <td> 0.000</td> <td>    0.091</td> <td>    0.120</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>20.185</td> <th>  Durbin-Watson:     </th> <td>   2.151</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  37.750</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.443</td> <th>  Prob(JB):          </th> <td>6.35e-09</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 4.711</td> <th>  Cond. No.          </th> <td>    53.0</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    tip   R-squared:                       0.457\n",
       "Model:                            OLS   Adj. R-squared:                  0.454\n",
       "Method:                 Least Squares   F-statistic:                     203.4\n",
       "Date:                Fri, 11 May 2018   Prob (F-statistic):           6.69e-34\n",
       "Time:                        18:19:28   Log-Likelihood:                -350.54\n",
       "No. Observations:                 244   AIC:                             705.1\n",
       "Df Residuals:                     242   BIC:                             712.1\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept      0.9203      0.160      5.761      0.000       0.606       1.235\n",
       "total_bill     0.1050      0.007     14.260      0.000       0.091       0.120\n",
       "==============================================================================\n",
       "Omnibus:                       20.185   Durbin-Watson:                   2.151\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               37.750\n",
       "Skew:                           0.443   Prob(JB):                     6.35e-09\n",
       "Kurtosis:                       4.711   Cond. No.                         53.0\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Intercept     0.920270\n",
       "total_bill    0.105025\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import linear_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "lr = linear_model.LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Expected 2D array, got 1D array instead:\narray=[ 16.99  10.34  21.01  23.68  24.59  25.29   8.77  26.88  15.04  14.78\n  10.27  35.26  15.42  18.43  14.83  21.58  10.33  16.29  16.97  20.65\n  17.92  20.29  15.77  39.42  19.82  17.81  13.37  12.69  21.7   19.65\n   9.55  18.35  15.06  20.69  17.78  24.06  16.31  16.93  18.69  31.27\n  16.04  17.46  13.94   9.68  30.4   18.29  22.23  32.4   28.55  18.04\n  12.54  10.29  34.81   9.94  25.56  19.49  38.01  26.41  11.24  48.27\n  20.29  13.81  11.02  18.29  17.59  20.08  16.45   3.07  20.23  15.01\n  12.02  17.07  26.86  25.28  14.73  10.51  17.92  27.2   22.76  17.29\n  19.44  16.66  10.07  32.68  15.98  34.83  13.03  18.28  24.71  21.16\n  28.97  22.49   5.75  16.32  22.75  40.17  27.28  12.03  21.01  12.46\n  11.35  15.38  44.3   22.42  20.92  15.36  20.49  25.21  18.24  14.31  14.\n   7.25  38.07  23.95  25.71  17.31  29.93  10.65  12.43  24.08  11.69\n  13.42  14.26  15.95  12.48  29.8    8.52  14.52  11.38  22.82  19.08\n  20.27  11.17  12.26  18.26   8.51  10.33  14.15  16.    13.16  17.47\n  34.3   41.19  27.05  16.43   8.35  18.64  11.87   9.78   7.51  14.07\n  13.13  17.26  24.55  19.77  29.85  48.17  25.    13.39  16.49  21.5\n  12.66  16.21  13.81  17.51  24.52  20.76  31.71  10.59  10.63  50.81\n  15.81   7.25  31.85  16.82  32.9   17.89  14.48   9.6   34.63  34.65\n  23.33  45.35  23.17  40.55  20.69  20.9   30.46  18.15  23.1   15.69\n  19.81  28.44  15.48  16.58   7.56  10.34  43.11  13.    13.51  18.71\n  12.74  13.    16.4   20.53  16.47  26.59  38.73  24.27  12.76  30.06\n  25.89  48.33  13.27  28.17  12.9   28.15  11.59   7.74  30.14  12.16\n  13.42   8.58  15.98  13.42  16.27  10.09  20.45  13.28  22.12  24.01\n  15.69  11.61  10.77  15.53  10.07  12.6   32.83  35.83  29.03  27.18\n  22.67  17.82  18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-17-3a4caf5accd0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpredicted\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tip'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\base.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m    480\u001b[0m         \u001b[0mn_jobs_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    481\u001b[0m         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[1;32m--> 482\u001b[1;33m                          y_numeric=True, multi_output=True)\n\u001b[0m\u001b[0;32m    483\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    484\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    571\u001b[0m     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m    572\u001b[0m                     \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 573\u001b[1;33m                     ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m    574\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    575\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    439\u001b[0m                     \u001b[1;34m\"Reshape your data either using array.reshape(-1, 1) if \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    440\u001b[0m                     \u001b[1;34m\"your data has a single feature or array.reshape(1, -1) \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 441\u001b[1;33m                     \"if it contains a single sample.\".format(array))\n\u001b[0m\u001b[0;32m    442\u001b[0m             \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_2d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    443\u001b[0m             \u001b[1;31m# To ensure that array flags are maintained\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: Expected 2D array, got 1D array instead:\narray=[ 16.99  10.34  21.01  23.68  24.59  25.29   8.77  26.88  15.04  14.78\n  10.27  35.26  15.42  18.43  14.83  21.58  10.33  16.29  16.97  20.65\n  17.92  20.29  15.77  39.42  19.82  17.81  13.37  12.69  21.7   19.65\n   9.55  18.35  15.06  20.69  17.78  24.06  16.31  16.93  18.69  31.27\n  16.04  17.46  13.94   9.68  30.4   18.29  22.23  32.4   28.55  18.04\n  12.54  10.29  34.81   9.94  25.56  19.49  38.01  26.41  11.24  48.27\n  20.29  13.81  11.02  18.29  17.59  20.08  16.45   3.07  20.23  15.01\n  12.02  17.07  26.86  25.28  14.73  10.51  17.92  27.2   22.76  17.29\n  19.44  16.66  10.07  32.68  15.98  34.83  13.03  18.28  24.71  21.16\n  28.97  22.49   5.75  16.32  22.75  40.17  27.28  12.03  21.01  12.46\n  11.35  15.38  44.3   22.42  20.92  15.36  20.49  25.21  18.24  14.31  14.\n   7.25  38.07  23.95  25.71  17.31  29.93  10.65  12.43  24.08  11.69\n  13.42  14.26  15.95  12.48  29.8    8.52  14.52  11.38  22.82  19.08\n  20.27  11.17  12.26  18.26   8.51  10.33  14.15  16.    13.16  17.47\n  34.3   41.19  27.05  16.43   8.35  18.64  11.87   9.78   7.51  14.07\n  13.13  17.26  24.55  19.77  29.85  48.17  25.    13.39  16.49  21.5\n  12.66  16.21  13.81  17.51  24.52  20.76  31.71  10.59  10.63  50.81\n  15.81   7.25  31.85  16.82  32.9   17.89  14.48   9.6   34.63  34.65\n  23.33  45.35  23.17  40.55  20.69  20.9   30.46  18.15  23.1   15.69\n  19.81  28.44  15.48  16.58   7.56  10.34  43.11  13.    13.51  18.71\n  12.74  13.    16.4   20.53  16.47  26.59  38.73  24.27  12.76  30.06\n  25.89  48.33  13.27  28.17  12.9   28.15  11.59   7.74  30.14  12.16\n  13.42   8.58  15.98  13.42  16.27  10.09  20.45  13.28  22.12  24.01\n  15.69  11.61  10.77  15.53  10.07  12.6   32.83  35.83  29.03  27.18\n  22.67  17.82  18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample."
     ]
    }
   ],
   "source": [
    "predicted = lr.fit(X=tips['total_bill'], y=tips['tip'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "predicted = lr.fit(X=tips['total_bill'].values.reshape(-1, 1),\n",
    "                   y=tips['tip'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.10502452])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predicted.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.92026961355467352"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predicted.intercept_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model = smf.ols('tip ~ total_bill + sex', data=tips).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>tip</td>       <th>  R-squared:         </th> <td>   0.457</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.452</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   101.3</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Fri, 11 May 2018</td> <th>  Prob (F-statistic):</th> <td>1.18e-32</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>18:20:35</td>     <th>  Log-Likelihood:    </th> <td> -350.52</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   244</td>      <th>  AIC:               </th> <td>   707.0</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   241</td>      <th>  BIC:               </th> <td>   717.5</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     2</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "        <td></td>           <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th>     <td>    0.9067</td> <td>    0.175</td> <td>    5.182</td> <td> 0.000</td> <td>    0.562</td> <td>    1.251</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>sex[T.Female]</th> <td>    0.0266</td> <td>    0.138</td> <td>    0.192</td> <td> 0.848</td> <td>   -0.246</td> <td>    0.299</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>total_bill</th>    <td>    0.1052</td> <td>    0.007</td> <td>   14.110</td> <td> 0.000</td> <td>    0.091</td> <td>    0.120</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>20.499</td> <th>  Durbin-Watson:     </th> <td>   2.149</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  38.652</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.447</td> <th>  Prob(JB):          </th> <td>4.05e-09</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 4.733</td> <th>  Cond. No.          </th> <td>    63.0</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    tip   R-squared:                       0.457\n",
       "Model:                            OLS   Adj. R-squared:                  0.452\n",
       "Method:                 Least Squares   F-statistic:                     101.3\n",
       "Date:                Fri, 11 May 2018   Prob (F-statistic):           1.18e-32\n",
       "Time:                        18:20:35   Log-Likelihood:                -350.52\n",
       "No. Observations:                 244   AIC:                             707.0\n",
       "Df Residuals:                     241   BIC:                             717.5\n",
       "Df Model:                           2                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "=================================================================================\n",
       "                    coef    std err          t      P>|t|      [0.025      0.975]\n",
       "---------------------------------------------------------------------------------\n",
       "Intercept         0.9067      0.175      5.182      0.000       0.562       1.251\n",
       "sex[T.Female]     0.0266      0.138      0.192      0.848      -0.246       0.299\n",
       "total_bill        0.1052      0.007     14.110      0.000       0.091       0.120\n",
       "==============================================================================\n",
       "Omnibus:                       20.499   Durbin-Watson:                   2.149\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               38.652\n",
       "Skew:                           0.447   Prob(JB):                     4.05e-09\n",
       "Kurtosis:                       4.733   Cond. No.                         63.0\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "lr = linear_model.LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: 'Female'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-25-1e0106ce31da>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpredicted\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtips\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'sex'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtips\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tip'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\base.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m    480\u001b[0m         \u001b[0mn_jobs_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    481\u001b[0m         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[1;32m--> 482\u001b[1;33m                          y_numeric=True, multi_output=True)\n\u001b[0m\u001b[0;32m    483\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    484\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    571\u001b[0m     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m    572\u001b[0m                     \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 573\u001b[1;33m                     ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m    574\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    575\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    446\u001b[0m         \u001b[1;31m# make sure we actually converted to numeric:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    447\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mdtype_numeric\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"O\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 448\u001b[1;33m             \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    449\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mallow_nd\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>=\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    450\u001b[0m             raise ValueError(\"Found array with dim %d. %s expected <= 2.\"\n",
      "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'Female'"
     ]
    }
   ],
   "source": [
    "predicted = lr.fit(tips[['total_bill', 'sex']], tips['tip'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tips_dummy = pd.get_dummies(tips[['total_bill', 'sex', 'tip']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex_Male</th>\n",
       "      <th>sex_Female</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip  sex_Male  sex_Female\n",
       "0       16.99  1.01         0           1\n",
       "1       10.34  1.66         1           0\n",
       "2       21.01  3.50         1           0\n",
       "3       23.68  3.31         1           0\n",
       "4       24.59  3.61         0           1"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips_dummy.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 244 entries, 0 to 243\n",
      "Data columns (total 7 columns):\n",
      "total_bill    244 non-null float64\n",
      "tip           244 non-null float64\n",
      "sex           244 non-null category\n",
      "smoker        244 non-null category\n",
      "day           244 non-null category\n",
      "time          244 non-null category\n",
      "size          244 non-null int64\n",
      "dtypes: category(4), float64(2), int64(1)\n",
      "memory usage: 7.2 KB\n"
     ]
    }
   ],
   "source": [
    "tips.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tip</th>\n",
       "      <th>total_bill</th>\n",
       "      <th>sex_Female</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.01</td>\n",
       "      <td>16.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.66</td>\n",
       "      <td>10.34</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.50</td>\n",
       "      <td>21.01</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.31</td>\n",
       "      <td>23.68</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3.61</td>\n",
       "      <td>24.59</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    tip  total_bill  sex_Female\n",
       "0  1.01       16.99           1\n",
       "1  1.66       10.34           0\n",
       "2  3.50       21.01           0\n",
       "3  3.31       23.68           0\n",
       "4  3.61       24.59           1"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips_dummy = pd.get_dummies(tips[['tip', 'total_bill', 'sex']], drop_first=True)\n",
    "tips_dummy.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "predicted = lr.fit(tips_dummy.iloc[:, 1:], tips_dummy['tip'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.10523236,  0.02660871])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predicted.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tip</th>\n",
       "      <th>total_bill</th>\n",
       "      <th>sex_Female</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.01</td>\n",
       "      <td>16.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.66</td>\n",
       "      <td>10.34</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.50</td>\n",
       "      <td>21.01</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.31</td>\n",
       "      <td>23.68</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3.61</td>\n",
       "      <td>24.59</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    tip  total_bill  sex_Female\n",
       "0  1.01       16.99           1\n",
       "1  1.66       10.34           0\n",
       "2  3.50       21.01           0\n",
       "3  3.31       23.68           0\n",
       "4  3.61       24.59           1"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips_dummy.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from patsy import dmatrices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "response, predictors = dmatrices('tip ~ total_bill + sex', data = tips)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#predictors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "predicted = lr.fit(predictors, response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.        ,  0.02660871,  0.10523236]])"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predicted.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
